#! /bin/bash
sampleID=$1
sampleDir=$2
# ref
ref=/data/fs09/wangzf/nanopore/ztf/HCC/ref/hg38_mainChr.fa
dbsnp=/data/fs01/wangzf/nanopore/ref/dbsnp_146.hg38.vcf.gz
dbsnp1000G=/data/fs01/wangzf/nanopore/ref/1000G_phase1.snps.high_confidence.hg38.vcf.gz
dbindel1000G=/data/fs01/wangzf/nanopore/ref/1000G_phase1.snps.high_confidence.hg38.vcf.gz
hapmap=/data/fs01/wangzf/nanopore/ref/hapmap_3.3.hg38.vcf.gz
omni=/data/fs01/wangzf/nanopore/ref/1000G_omni2.5.hg38.vcf.gz
mills=/data/fs01/wangzf/nanopore/ref/Mills_and_1000G_gold_standard.indels.hg38.vcf.gz
# files configure
sam=${sampleDir}/${sampleID}.sam
bam=${sampleDir}/${sampleID}.bam
bam_merge=${sampleDir}/${sampleID}_BWA_merge.bam
# bam_sort=${sampleDir}/${sampleID}_BWA_merge_sorted.bam
# sample GATK dir
sampleDir_gatk=${sampleDir}/GATK
logfile=${sampleDir_gatk}/${sampleID}_GATK.log
bam_markdup=${sampleDir_gatk}/${sampleID}_BWA_merge_sorted_markdup.bam
matrix_markdup=${sampleDir_gatk}/${sampleID}_BWA_merge_sorted_markdup.txt
recal_data_table=${sampleDir_gatk}/${sampleID}_BWA_merge_sorted_markdup.recal_data.table
bam_BQSR=${sampleDir_gatk}/${sampleID}_BWA_merge_sorted_markdup_BQSR.bam
hc_vcf=${sampleDir_gatk}/${sampleID}_HC.vcf.gz
snp_recal=${sampleDir_gatk}/${sampleID}_HC.snp.recal
snp_recal_tranches=${sampleDir_gatk}/${sampleID}_HC.snp.tranches
snp_recal_rscript=${sampleDir_gatk}/${sampleID}_HC.snp.plots.R
snp_VQSR=${sampleDir_gatk}/${sampleID}_HC.snp.VQSR.vcf.gz
snp_VQSR_select=${sampleDir_gatk}/${sampleID}_HC.snp.VQSR.select.vcf.gz
indel_recal=${sampleDir_gatk}/${sampleID}_HC.indel.recal
indel_recal_tranches=${sampleDir_gatk}/${sampleID}_HC.indel.tranches
indel_recal_rscript=${sampleDir_gatk}/${sampleID}_HC.indel.plots.R
indel_VQSR=${sampleDir_gatk}/${sampleID}_HC.indel.VQSR.vcf.gz
indel_VQSR_select=${sampleDir_gatk}/${sampleID}_HC.indel.VQSR.select.vcf.gz
# tools
gatk=/data/fs01/wangzf/nanopore/ref/gatk-4.1.2.0/gatk
echo "$(date) 4. mark dup start: $sampleID" >> "$logfile"
${gatk} --java-options "-Xmx8G -Djava.io.tmpdir=${sampleDir_gatk}" MarkDuplicates -I "$bam_merge" -O "$bam_markdup" -M "$matrix_markdup"
/data/fs01/biosoft/samtools-1.9/samtools index -@ 20 "$bam_markdup"
echo "$(date) 4. mark dup finish: $sampleID" >> "$logfile"
#BQSR
echo "$(date) 5-1. BQSR BaseRecalibrator start: $sampleID" >> "$logfile"
${gatk} --java-options "-Xmx8G -Djava.io.tmpdir=${sampleDir_gatk}" BaseRecalibrator -R $ref -I "$bam_markdup" -O "$recal_data_table" --known-sites ${dbsnp} --known-sites ${dbsnp1000G} --known-sites ${dbindel1000G}
echo "$(date) 5-1. BQSR BaseRecalibrator finish: $sampleID" >> "$logfile"
echo "$(date) 5-2. BQSR ApplyBQSR start: $sampleID" >> "$logfile"
${gatk} --java-options "-Xmx8G -Djava.io.tmpdir=${sampleDir_gatk}" ApplyBQSR -R $ref -I "$bam_markdup" --bqsr-recal-file "$recal_data_table" -O "$bam_BQSR"
echo "$(date) 5-2. BQSR ApplyBQSR finish: $sampleID" >> "$logfile"

